#!/usr/bin/env perl
use warnings;
use strict;

# Look up neighbour entries for a single neighbours_\d+ file.
# Neighbours with exactly zero similarity are not printed, because this usually
# means that we don't have any data for the movie. 
# Takes the neighbours file on stdin (see src/neighbours.cpp for format).
# Takes the titles file as its only argument (one title per line - see
# netflix_titles in Makefile).
# Neighbours with names is on stdout (comma-separated values with same order
# as format in src/neighbours, but with titles before each neighbour id).
#

scalar(@ARGV) == 1 or die "need list of similarity files as argument";
my $titles_file = $ARGV[0];
-e$titles_file or die "titles file $titles_file not found";

open(my $fh, "<$titles_file");
my @titles = <$fh>;
close($fh);
map { chomp } @titles;

while (<STDIN>) {
  /(\d+):(.*)/ or die "unexpected line format in neighbours file: $_";
  my $movie_id = int($1);
  my @neighbour_list = split ' ', $2;

  # Lookup reference movie.
  print $titles[$movie_id - 1], ',', $movie_id;

  # The neighbour list is a space-delimited list of (neighbour id, similarity)
  # pairs (neighbour similarity neighbour similarity ...).
  for (my $i = 0; $i < scalar(@neighbour_list); $i += 2) {
    my $neighbour_id = int($neighbour_list[$i]);
    my $similarity = $neighbour_list[$i+1];

    # Skip entries with similarity exactly zero (probably no data).
    if ($similarity != 0.0) {
      print ',', $titles[$neighbour_id - 1],
        ',', $neighbour_id, ',', $similarity;
    }
  }
  print "\n";
}

# Copyright (c) 2009 John Lees-Miller
# 
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
# 
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

